from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import re
import time

def get_html_source(url):
    try:
        browser.get(url)
        time.sleep(0.5)
        page_source = browser.page_source
        print('获取地址成功:%s'%url)
        return page_source
    except:
        print('获取地址失败:%s'%url)


def analysis_page_source(page_source):
    try:
        hot_soup = re.sub(r'<.*?>', '\n', page_source).replace(':','\n').replace('：','\n').replace(' ','\n').replace('\t','\n')
        html_message_list = re.findall(r'.+',hot_soup)
        print('解析成功啦')
        return html_message_list
    except:
        print('解析失败')


def get_specs(specs_list,html_message_list):
    for i in specs_list:
        try:
            specs = html_message_list.index(i)+1
        except:
            continue
        else:
            print(html_message_list[specs])
            break


if __name__ == '__main__':
    begin_time = time.time()
    options = Options()
    options.add_argument('--headless')
    browser = webdriver.Chrome('chromedriver',options=options)

    with open('url_list','r') as f:
        urls = f.readlines()

    specs_list = ['商品规格', '产品规格', '规格', '净含量', '规格名称']
    for url_1 in urls:
        url = url_1.strip('\n')
        page_source = get_html_source(url)
        html_message_list = analysis_page_source(page_source)
        get_specs(specs_list,html_message_list)
        print('*'*20)
    print('执行结束')
    end_time = time.time()
    run_time = end_time - begin_time
    print(run_time)
